Tanzania fertilizer use efficiency, profitability and risks across time

Author

Maxwell Mkondiwa (m.mkondiwa@cgiar.org) and Jordan Chamberlin (j.chamberlin@cgiar.org)

1 Introduction

This notebook provides R code for the paper on estimating the heterogenous treatment effects in Tanzania using non-parametric geoadditive and causal machine learning (ML) models. The analytics focus on addressing the following analysis questions:

2 Exploration

Code
# package names
packages <- c("gplots", "modelsummary", "grf", "policytree", "ggplot2", "micEcon", "frontier", "dplyr", "tidyr", "knitr", "car", "RColorBrewer", "DT", "rio", "tidyr", "dsfa", "mgcv", "geodata", "sf", "mapview", "dplyr", "terra", "raster", "ggridges", "rio", "BART", "BART", "BayesTree", "bartCause", "plm", "rlearner")

# install packages
# installed_packages <- packages %in% rownames(installed.packages())
# if (any(installed_packages == FALSE)) {
#     install.packages(packages[!installed_packages])
# }

# load packages
invisible(lapply(packages, library, character.only = TRUE))
# install.packages("collapse", repos = "https://fastverse.r-universe.dev")
library(rio)


#tz_lsms_panel <- import("tz_lsms_panel_plot_level_221117.dta")
tz_lsms_panel <- import("tz_lsms_panel.csv")

tz_lsms_panel$yld_[tz_lsms_panel$yld_ > quantile(tz_lsms_panel$yld_, 0.99, na.rm = TRUE)] <- NA
tz_lsms_panel$N_kgha[tz_lsms_panel$N_kgha > quantile(tz_lsms_panel$N_kgha, 0.99, na.rm = TRUE)] <- NA

tz_lsms_panel$plotha[tz_lsms_panel$plotha > quantile(tz_lsms_panel$plotha, 0.99, na.rm = TRUE)] <- NA

tz_lsms_panel$N_kgha_dum <- 0
tz_lsms_panel$N_kgha_dum[tz_lsms_panel$N_kgha > 0] <- 1

tz_lsms_panel$N_kgha_dum <- as.numeric(tz_lsms_panel$N_kgha_dum)

tz_lsms_panel$N_kgha_cond <- tz_lsms_panel$N_kgha
tz_lsms_panel$N_kgha_cond[tz_lsms_panel$N_kgha_dum == 0] <- NA

tz_lsms_panel$N_kgha_cond <- as.numeric(tz_lsms_panel$N_kgha_cond)


tz_lsms_panel=subset(tz_lsms_panel, !(is.na(tz_lsms_panel$yld_)))
tz_lsms_panel=subset(tz_lsms_panel, !(is.na(tz_lsms_panel$N_kgha)))
tz_lsms_panel=subset(tz_lsms_panel, !(is.na(tz_lsms_panel$plotha)))
tz_lsms_panel=subset(tz_lsms_panel, !(is.na(tz_lsms_panel$P_kgha)))
tz_lsms_panel=subset(tz_lsms_panel, !(is.na(tz_lsms_panel$harv_yld_mai)))

tz_lsms_panel$soc_5_15cm=tz_lsms_panel$`soc_5-15cm`
tz_lsms_panel$nitrogen_0_5cm=tz_lsms_panel$`nitrogen_0-5cm`
tz_lsms_panel$sand_0_5cm=tz_lsms_panel$`sand_0-5cm`
tz_lsms_panel$ECN_5_15cm=tz_lsms_panel$`ECN_5-15cm`
tz_lsms_panel$pH_0_5cm=tz_lsms_panel$`pH_0-5cm`

3 Descriptives table

Code
library(modelsummary)

mean_na <- function(x) mean(x, na.rm = TRUE)
sd_na <- function(x) SD(x, na.rm = TRUE)
summary_table_by_year <- datasummary(Heading("N obs") * N + Heading("%") * Percent() + (yld_ + N_kgha_dum  +N_kgha_cond+ N_kgha + P_kgha+plotha + ncrops + hhmem + femhead + headeduc + arain_tot+arain_cv) * (mean_na + sd_na) ~ Factor(year), data = tz_lsms_panel, output = "data.frame")

summary_table_by_year_mean <- datasummary(Heading("N obs") * N + Heading("%") * Percent() + (yld_ + N_kgha_dum  +N_kgha_cond+ N_kgha + P_kgha+plotha + ncrops + hhmem + femhead + headeduc + arain_tot+arain_cv) * (mean_na) ~ Factor(year), data = tz_lsms_panel, output = "data.frame")




library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('summary_table_by_year', 'summary_table_by_year.csv')"
        ),
        reactable(
            summary_table_by_year,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "summary_table_by_year"
        )
    )
)
Code
library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('summary_table_by_year_mean', 'summary_table_by_year_mean.csv')"
        ),
        reactable(
            summary_table_by_year_mean,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "summary_table_by_year_mean"
        )
    )
)
Code
# # By district and by year
# summary_table_by_year_dist <- datasummary(Heading("N obs") * N + Heading("%") * Percent() + (yld_ + N_kgha_g_0  + N_kgha + plotha + P_kgha + ncrops + hhmem + femhead + headeduc + arain_tot) * (mean_na + sd_na) ~ Factor(year*district), data = tz_lsms_panel, output = "data.frame")
# 
# library(reactable)
# library(htmltools)
# library(fontawesome)
# 
# htmltools::browsable(
#     tagList(
#         tags$button(
#             tagList(fontawesome::fa("download"), "Download as CSV"),
#             onclick = "Reactable.downloadDataCSV('summary_table_by_year_dist', 'summary_table_by_year_dist.csv')"
#         ),
#         reactable(
#             summary_table_by_year,
#             searchable = TRUE,
#             defaultPageSize = 38,
#             elementId = "summary_table_by_year_dist"
#         )
#     )
# )


tz_lsms_panel$region=as.factor(tz_lsms_panel$region)
tz_lsms_panel$year=as.factor(tz_lsms_panel$year)

# By district and by year
summary_table_by_year_region <- datasummary(Heading("N obs") * N + Heading("%") * Percent() + (yld_ + N_kgha_dum  +N_kgha_cond+ N_kgha + P_kgha+plotha+ ncrops + hhmem + femhead + headeduc + arain_tot)*(mean_na) ~ year*region, data = tz_lsms_panel, output = "data.frame")

summary_table_by_year_region_mean <- datasummary(Heading("N obs") * N + Heading("%") * Percent() + (yld_ + N_kgha_dum  +N_kgha_cond+ N_kgha + P_kgha+plotha + ncrops + hhmem + femhead + headeduc + arain_tot)*(mean_na) ~ year*region, data = tz_lsms_panel, output = "data.frame")


summary_table_by_year_region_t=t(summary_table_by_year_region)
summary_table_by_year_region_mean_t=t(summary_table_by_year_region_mean)

library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('summary_table_by_year_region_t', 'summary_table_by_year_region_t.csv')"
        ),
        reactable(
            summary_table_by_year_region_t,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "summary_table_by_year_region_t"
        )
    )
)
Code
library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('summary_table_by_year_region_mean_t', 'summary_table_by_year_region_mean_t.csv')"
        ),
        reactable(
            summary_table_by_year_region_mean_t,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "summary_table_by_year_region_mean_t"
        )
    )
)

4 Graphics

Code
library(gplots)
plotmeans(yld_ ~ year, main="Yield heterogeineity across years",xlab="Year", ylab="Maize yield (kg/ha)", data=tz_lsms_panel)

Code
library(gplots)
plotmeans(N_kgha ~ year, main="N per ha heterogeineity across years", data=tz_lsms_panel,xlab="Year", ylab="Average N per ha" )

Code
plotmeans(arain_tot~ year, main="Rainfall across years", data=tz_lsms_panel,xlab="Year", ylab="Average total rainfall (mm)" )

Code
library(easynls)
library(lme4)
library(data.table)
library(ggplot2)

# summary_table_by_year_mean_t=t(summary_table_by_year_mean)
# 
# ggplot(summary_table_by_year_mean) +
#   geom_line(aes(x=N, y=Y, group=year, color=year))
# 
# ggplot(summary_table_by_year_region) +
#   geom_line(aes(x=N, y=Y, group=region, color=region))

5 Conventional Production Function Approach

5.1 Linear and non-linear parameter models: e.g., Quadratic

Code
library(data.table)
library(ggplot2)
library(easynls)
library(lme4)
library(nlme)

tz_lsms_panel=data.table(tz_lsms_panel)
tz_lsms_panel$year=as.factor(tz_lsms_panel$year)

#tz_lsms_panel_sf_subset=subset(tz_lsms_panel_sf, #select=c("V1","yld_","harv_yld_mai","N_kgha","P_kgha","year","soc_5-15cm","population_density","wc2.1_30s_elev","#sand_0-5cm","nitrogen_0-5cm","ECN_5-15cm","pH_0-5cm"))                                                      

#library(tidyr)
#tz_lsms_panel_sf_subset=tz_lsms_panel_sf_subset %>% drop_na()

# Linear
baseline_ols=lm(yld_~N_kgha+plotha+P_kgha+ncrops+hhmem+femhead+headeduc+arain_tot+region+year,data=tz_lsms_panel)

summary(baseline_ols)

Call:
lm(formula = yld_ ~ N_kgha + plotha + P_kgha + ncrops + hhmem + 
    femhead + headeduc + arain_tot + region + year, data = tz_lsms_panel)

Residuals:
    Min      1Q  Median      3Q     Max 
-2276.9  -440.5  -178.2   201.4  5410.4 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  7.339e+02  5.672e+01  12.937  < 2e-16 ***
N_kgha       8.740e+00  4.734e-01  18.463  < 2e-16 ***
plotha      -1.358e+02  6.983e+00 -19.450  < 2e-16 ***
P_kgha       5.709e+00  7.465e-01   7.647 2.26e-14 ***
ncrops      -5.799e+01  9.087e+00  -6.382 1.83e-10 ***
hhmem        1.498e+01  2.681e+00   5.590 2.34e-08 ***
femhead     -8.102e+01  1.881e+01  -4.306 1.68e-05 ***
headeduc     5.489e+01  1.269e+01   4.326 1.53e-05 ***
arain_tot   -9.941e-03  2.352e-02  -0.423 0.672523    
region2      3.340e+02  5.912e+01   5.649 1.66e-08 ***
region3      1.898e+02  5.912e+01   3.210 0.001330 ** 
region4      1.639e+02  5.139e+01   3.190 0.001427 ** 
region5      9.571e+01  5.310e+01   1.802 0.071506 .  
region6     -2.386e+02  8.462e+01  -2.819 0.004823 ** 
region7      7.442e+01  1.028e+02   0.724 0.469276    
region8     -1.022e+02  5.462e+01  -1.870 0.061448 .  
region9     -1.677e+02  4.873e+01  -3.441 0.000583 ***
region10     5.772e+01  5.082e+01   1.136 0.256121    
region11     2.204e+02  4.854e+01   4.541 5.67e-06 ***
region12     3.337e+02  4.719e+01   7.073 1.63e-12 ***
region13     1.397e+02  6.147e+01   2.272 0.023080 *  
region14    -1.022e+00  4.787e+01  -0.021 0.982975    
region15     4.573e+02  5.585e+01   8.189 2.99e-16 ***
region16    -3.991e+01  5.644e+01  -0.707 0.479530    
region17    -3.052e+01  5.167e+01  -0.591 0.554778    
region18    -1.510e+02  6.044e+01  -2.498 0.012512 *  
region19    -1.064e+02  5.697e+01  -1.869 0.061714 .  
region20    -2.175e+01  7.066e+01  -0.308 0.758166    
region21     4.673e+02  6.363e+01   7.344 2.24e-13 ***
region22     1.517e+01  8.852e+01   0.171 0.863907    
region23     4.757e+02  8.977e+01   5.299 1.19e-07 ***
region24     9.621e+01  6.453e+01   1.491 0.136034    
region25     1.258e+02  8.649e+01   1.454 0.145846    
region26     3.356e+02  1.713e+02   1.960 0.050049 .  
region51    -9.463e+01  2.206e+02  -0.429 0.668021    
region52    -1.786e+02  3.095e+02  -0.577 0.563956    
region53     9.781e+02  5.327e+02   1.836 0.066384 .  
region54    -9.700e+01  1.732e+02  -0.560 0.575529    
region55    -4.864e+02  2.869e+02  -1.695 0.090023 .  
year2010     5.237e+01  2.602e+01   2.013 0.044176 *  
year2012     2.558e+01  2.514e+01   1.017 0.308957    
year2014     2.145e+02  2.796e+01   7.671 1.87e-14 ***
year2020     1.759e+02  2.990e+01   5.885 4.13e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 751 on 9318 degrees of freedom
  (65 observations deleted due to missingness)
Multiple R-squared:  0.1938,    Adjusted R-squared:  0.1902 
F-statistic: 53.33 on 42 and 9318 DF,  p-value: < 2.2e-16
Code
# Lm List by year
baseline_ols_yearList <- lmList(yld_~N_kgha+plotha+P_kgha+ncrops+hhmem+femhead+headeduc+arain_tot|year,tz_lsms_panel)

coef(baseline_ols_yearList)
     (Intercept)    N_kgha    plotha     P_kgha     ncrops     hhmem    femhead
2008    930.9606  8.668885 -124.1790 15.4458992  -98.64764 17.373137  -83.91927
2010    856.5553  7.088733 -171.6598  9.6837787  -89.22438 17.911735  -81.27483
2012    710.1052 11.511511 -128.0661  0.1649598  -33.34007  9.368089  -45.69876
2014    998.3947 13.024823 -101.8081  7.4553128 -106.47347 14.976799 -107.49563
2020   1087.9859  9.960142 -117.3886  7.9320702  -74.38693 12.197603 -112.98479
     headeduc   arain_tot
2008 67.08141 -0.04669357
2010 20.82818  0.10950019
2012 38.88662  0.10755817
2014 63.40795  0.04365176
2020 37.54491 -0.06323464
Code
(ci <- confint(baseline_ols_yearList))
An object of class "lmList4.confint"
, , (Intercept)

        2.5 %    97.5 %
2008 753.6952 1108.2260
2010 693.6715 1019.4392
2012 587.1459  833.0646
2014 843.9498 1152.8395
2020 930.4129 1245.5588

, , N_kgha

         2.5 %    97.5 %
2008  6.804201 10.533569
2010  5.135811  9.041654
2012  9.739294 13.283728
2014 10.776845 15.272801
2020  7.820350 12.099934

, , plotha

         2.5 %     97.5 %
2008 -160.8936  -87.46432
2010 -204.6546 -138.66503
2012 -152.5085 -103.62371
2014 -133.6674  -69.94889
2020 -148.1365  -86.64075

, , P_kgha

         2.5 %    97.5 %
2008 10.791759 20.100039
2010  6.328560 13.038997
2012 -2.355263  2.685182
2014  3.618337 11.292289
2020  4.595312 11.268829

, , ncrops

          2.5 %      97.5 %
2008 -139.15730 -58.1379880
2010 -131.95260 -46.4961685
2012  -67.49843   0.8182895
2014 -146.46833 -66.4786011
2020 -116.89111 -31.8827618

, , hhmem

         2.5 %   97.5 %
2008 3.8924144 30.85386
2010 6.0049495 29.81852
2012 0.1587785 18.57740
2014 2.3759461 27.57765
2020 1.0715666 23.32364

, , femhead

         2.5 %     97.5 %
2008 -172.0813   4.242785
2010 -165.9178   3.368091
2012 -117.3159  25.918380
2014 -199.0045 -15.986792
2020 -199.0741 -26.895477

, , headeduc

           2.5 %    97.5 %
2008  -0.6231079 134.78593
2010 -43.0080862  84.66444
2012  -7.4391648  85.21241
2014   5.6864344 121.12947
2020 -16.8345142  91.92433

, , arain_tot

           2.5 %     97.5 %
2008 -0.18387466 0.09048751
2010 -0.01880891 0.23780928
2012  0.01570261 0.19941373
2014 -0.05366435 0.14096787
2020 -0.13928780 0.01281851
Code
plot(ci,cex=2, cex.lab=3, xlab="Maize yield (kg/ha) response", ylab="Year")

Code
## LINEAR PLATEAU
# library(easynls)
# library(data.table)
# tz_lsms_panel <- data.table(tz_lsms_panel)
# tz_lsms_panel.temp <- tz_lsms_panel[, .("yld_","N_kgha")]
# nls.LP <- nlsfit(tz_lsms_panel.temp, model = 3)
# nls.LP$Model
# lp_model=nls.LP$Parameters
# lp_model
# summary(lp_model)

# QUADRATIC

baseline_quad=lm(yld_~N_kgha+I(N_kgha^2)+P_kgha+I(P_kgha^2)+plotha+ncrops+hhmem+femhead+headeduc+arain_tot+region+year,data=tz_lsms_panel)

summary(baseline_quad)

Call:
lm(formula = yld_ ~ N_kgha + I(N_kgha^2) + P_kgha + I(P_kgha^2) + 
    plotha + ncrops + hhmem + femhead + headeduc + arain_tot + 
    region + year, data = tz_lsms_panel)

Residuals:
    Min      1Q  Median      3Q     Max 
-2389.1  -439.4  -179.0   203.8  5406.2 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  7.325e+02  5.666e+01  12.928  < 2e-16 ***
N_kgha       4.423e+00  1.306e+00   3.385 0.000713 ***
I(N_kgha^2)  4.949e-02  1.470e-02   3.367 0.000763 ***
P_kgha       1.094e+01  1.373e+00   7.969 1.78e-15 ***
I(P_kgha^2) -4.156e-02  9.479e-03  -4.384 1.18e-05 ***
plotha      -1.353e+02  6.978e+00 -19.387  < 2e-16 ***
ncrops      -5.741e+01  9.077e+00  -6.325 2.65e-10 ***
hhmem        1.492e+01  2.677e+00   5.573 2.57e-08 ***
femhead     -8.131e+01  1.879e+01  -4.327 1.53e-05 ***
headeduc     5.550e+01  1.268e+01   4.378 1.21e-05 ***
arain_tot   -9.895e-03  2.349e-02  -0.421 0.673564    
region2      3.357e+02  5.905e+01   5.686 1.34e-08 ***
region3      2.043e+02  5.915e+01   3.454 0.000555 ***
region4      1.646e+02  5.133e+01   3.207 0.001347 ** 
region5      9.552e+01  5.303e+01   1.801 0.071696 .  
region6     -2.375e+02  8.451e+01  -2.811 0.004953 ** 
region7      7.094e+01  1.027e+02   0.691 0.489745    
region8     -1.020e+02  5.455e+01  -1.870 0.061488 .  
region9     -1.643e+02  4.867e+01  -3.375 0.000740 ***
region10     7.777e+01  5.105e+01   1.523 0.127720    
region11     2.355e+02  4.886e+01   4.820 1.46e-06 ***
region12     3.320e+02  4.732e+01   7.017 2.43e-12 ***
region13     1.442e+02  6.141e+01   2.349 0.018861 *  
region14     6.149e+00  4.788e+01   0.128 0.897820    
region15     4.597e+02  5.578e+01   8.241  < 2e-16 ***
region16    -3.761e+01  5.639e+01  -0.667 0.504791    
region17    -2.864e+01  5.160e+01  -0.555 0.578870    
region18    -1.489e+02  6.037e+01  -2.466 0.013672 *  
region19    -1.047e+02  5.689e+01  -1.841 0.065675 .  
region20    -2.094e+01  7.057e+01  -0.297 0.766618    
region21     4.676e+02  6.354e+01   7.358 2.02e-13 ***
region22     2.653e+01  8.873e+01   0.299 0.764949    
region23     4.824e+02  8.967e+01   5.380 7.65e-08 ***
region24     9.939e+01  6.445e+01   1.542 0.123071    
region25     1.260e+02  8.639e+01   1.458 0.144860    
region26     3.143e+02  1.711e+02   1.837 0.066268 .  
region51    -9.447e+01  2.204e+02  -0.429 0.668140    
region52    -1.776e+02  3.091e+02  -0.574 0.565656    
region53     9.799e+02  5.320e+02   1.842 0.065544 .  
region54    -9.773e+01  1.730e+02  -0.565 0.572163    
region55    -4.847e+02  2.865e+02  -1.692 0.090688 .  
year2010     5.492e+01  2.600e+01   2.112 0.034681 *  
year2012     2.679e+01  2.512e+01   1.067 0.286225    
year2014     2.118e+02  2.794e+01   7.583 3.70e-14 ***
year2020     1.729e+02  2.988e+01   5.785 7.50e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 750.1 on 9316 degrees of freedom
  (65 observations deleted due to missingness)
Multiple R-squared:  0.1961,    Adjusted R-squared:  0.1923 
F-statistic: 51.63 on 44 and 9316 DF,  p-value: < 2.2e-16
Code
library(modelsummary)

modelplot(baseline_quad)

Code
# Lm List by year
baseline_quad_lmList <- lmList(yld_~N_kgha+I(N_kgha^2)+P_kgha+I(P_kgha^2)+ncrops+hhmem+femhead+headeduc+arain_tot|year,tz_lsms_panel)

coef(baseline_quad_lmList )
     (Intercept)    N_kgha I(N_kgha^2)    P_kgha I(P_kgha^2)     ncrops
2008    945.9293  8.669655 0.005733987 30.893305 -0.18122928 -138.24253
2010    914.1726  2.701333 0.062520511 13.447263 -0.02296349 -136.98338
2012    775.0622 10.481262 0.010145979  5.574382 -0.03222565  -83.39304
2014   1003.9905  6.881725 0.076143989 10.590570 -0.02387429 -136.21425
2020   1101.4899  6.513740 0.044234354 13.073281 -0.05436070 -107.21972
         hhmem   femhead headeduc   arain_tot
2008  6.617584 -44.55724 51.45516 -0.05499994
2010  6.226192 -33.31606 30.40090  0.04035023
2012 -3.489106 -24.41176 44.82791  0.07694600
2014  7.668938 -70.56392 83.21657  0.02836584
2020  2.605807 -82.02956 45.70478 -0.08140485
Code
(ci <- confint(baseline_quad_lmList ))
An object of class "lmList4.confint"
, , (Intercept)

        2.5 %    97.5 %
2008 766.6400 1125.2186
2010 747.1990 1081.1463
2012 649.5676  900.5568
2014 847.5802 1160.4008
2020 941.4019 1261.5779

, , N_kgha

          2.5 %    97.5 %
2008  3.3473457 13.991964
2010 -2.9198997  8.322565
2012  5.5844073 15.378116
2014  0.2449181 13.518532
2020  0.3340659 12.693413

, , I(N_kgha^2)

            2.5 %     97.5 %
2008 -0.053134742 0.06460272
2010 -0.007538666 0.13257969
2012 -0.049837871 0.07012983
2014  0.002136701 0.15015128
2020 -0.025571957 0.11404067

, , P_kgha

          2.5 %   97.5 %
2008 21.0406311 40.74598
2010  6.2302947 20.66423
2012  0.7481391 10.40063
2014  2.5820769 18.59906
2020  5.3655723 20.78099

, , I(P_kgha^2)

           2.5 %       97.5 %
2008 -0.28275513 -0.079703416
2010 -0.06821233  0.022285346
2012 -0.05863059 -0.005820717
2014 -0.09455065  0.046802074
2020 -0.13372466  0.025003254

, , ncrops

         2.5 %    97.5 %
2008 -177.4989 -98.98618
2010 -179.7703 -94.19644
2012 -116.8634 -49.92267
2014 -175.4900 -96.93850
2020 -149.4259 -65.01351

, , hhmem

          2.5 %    97.5 %
2008  -6.660103 19.895271
2010  -5.772839 18.225223
2012 -12.566201  5.587988
2014  -4.900482 20.238359
2020  -8.395775 13.607388

, , femhead

          2.5 %    97.5 %
2008 -133.19076 44.076285
2010 -119.63799 53.005874
2012  -97.47164 48.648122
2014 -162.40548 21.277648
2020 -168.99237  4.933259

, , headeduc

          2.5 %    97.5 %
2008 -16.941986 119.85231
2010 -35.134573  95.93637
2012  -2.467796  92.12363
2014  25.027989 141.40515
2020  -9.505510 100.91507

, , arain_tot

           2.5 %       97.5 %
2008 -0.19385912  0.083859233
2010 -0.09062053  0.171320984
2012 -0.01683419  0.170726189
2014 -0.06992302  0.126654699
2020 -0.15843944 -0.004370266
Code
plot(ci,cex=2, cex.lab=3, xlab="Maize yield (kg/ha) response", ylab="Year")

Code
## with interactions
# Lm List by year
baseline_quad_lmList_inter <- lmList(yld_~N_kgha*arain_tot+I(N_kgha^2)*arain_tot+P_kgha+I(P_kgha^2)+ncrops+hhmem+femhead+headeduc|year,tz_lsms_panel)

coef(baseline_quad_lmList_inter )
     (Intercept)     N_kgha   arain_tot I(N_kgha^2)    P_kgha I(P_kgha^2)
2008   1042.1596 -0.6456866 -0.14672379 -0.02729364 30.208893 -0.17461650
2010    922.1706 11.1723352  0.03089159 -0.13588013 12.931895 -0.01856589
2012    780.8833 14.3400949  0.06984809 -0.10951256  5.188228 -0.03105019
2014    991.2959 -1.2471849  0.04295506  0.25209616 10.142296 -0.02497476
2020   1089.1133  3.6589824 -0.07402839  0.12946940 12.966264 -0.05625349
         ncrops     hhmem   femhead headeduc N_kgha:arain_tot
2008 -139.52926  6.212785 -46.41849 59.66394      0.009636395
2010 -136.72555  6.519137 -34.49700 30.41650     -0.008428748
2012  -83.04251 -3.408271 -25.20143 42.76124     -0.003471359
2014 -136.58886  7.248988 -71.75784 80.75932      0.007491750
2020 -107.40424  2.654036 -81.80697 47.74786      0.002517218
     arain_tot:I(N_kgha^2)
2008          2.133942e-05
2010          1.976315e-04
2012          1.217391e-04
2014         -1.610832e-04
2020         -6.885562e-05
Code
(ci <- confint(baseline_quad_lmList_inter))
An object of class "lmList4.confint"
, , (Intercept)

        2.5 %    97.5 %
2008 851.5772 1232.7420
2010 750.7456 1093.5956
2012 652.4601  909.3066
2014 831.1775 1151.4144
2020 924.4730 1253.7536

, , N_kgha

          2.5 %   97.5 %
2008 -20.212563 18.92119
2010 -15.414979 37.75965
2012  -5.226731 33.90692
2014 -21.227047 18.73268
2020 -16.692607 24.01057

, , arain_tot

           2.5 %      97.5 %
2008 -0.30004787 0.006600288
2010 -0.10817053 0.169953714
2012 -0.02756093 0.167257120
2014 -0.06032937 0.146239493
2020 -0.15537746 0.007320686

, , I(N_kgha^2)

           2.5 %    97.5 %
2008 -0.23746563 0.1828784
2010 -0.48434158 0.2125813
2012 -0.35067693 0.1316518
2014  0.02308014 0.4811122
2020 -0.13184247 0.3907813

, , P_kgha

          2.5 %   97.5 %
2008 20.3594408 40.05834
2010  5.6787836 20.18501
2012  0.2769588 10.09950
2014  2.1157736 18.16882
2020  5.2370165 20.69551

, , I(P_kgha^2)

           2.5 %      97.5 %
2008 -0.27601859 -0.07321441
2010 -0.06410419  0.02697241
2012 -0.05763608 -0.00446430
2014 -0.09576193  0.04581242
2020 -0.13574587  0.02323890

, , ncrops

         2.5 %     97.5 %
2008 -178.6565 -100.40203
2010 -179.5089  -93.94219
2012 -116.5103  -49.57475
2014 -175.8154  -97.36228
2020 -149.6249  -65.18356

, , hhmem

          2.5 %    97.5 %
2008  -7.030027 19.455597
2010  -5.549711 18.587984
2012 -12.486686  5.670144
2014  -5.308742 19.806718
2020  -8.349745 13.657816

, , femhead

          2.5 %    97.5 %
2008 -134.77203 41.935053
2010 -120.81385 51.819853
2012  -98.30105 47.898185
2014 -163.60194 20.086269
2020 -168.80193  5.187982

, , headeduc

          2.5 %    97.5 %
2008  -8.722556 128.05044
2010 -35.106251  95.93926
2012  -4.625432  90.14791
2014  22.538110 138.98054
2020  -7.572587 103.06831

, , N_kgha:arain_tot

           2.5 %     97.5 %
2008 -0.00830031 0.02757310
2010 -0.03417147 0.01731397
2012 -0.02457973 0.01763701
2014 -0.00956929 0.02455279
2020 -0.01193862 0.01697305

, , arain_tot:I(N_kgha^2)

             2.5 %       97.5 %
2008 -0.0001671711 2.098499e-04
2010 -0.0001420191 5.372822e-04
2012 -0.0001297723 3.732505e-04
2014 -0.0003571883 3.502187e-05
2020 -0.0002619389 1.242277e-04
Code
plot(ci,cex=2, cex.lab=3, xlab="Maize yield (kg/ha) response", ylab="Year")

Code
# Soil properties Interacted with soils 
baseline_quad_lmList_inter_soil <- lmList(yld_~N_kgha+I(N_kgha^2)+P_kgha+I(P_kgha^2)+ncrops+hhmem+femhead+headeduc+arain_tot+population_density+wc2.1_30s_elev+sand_0_5cm+nitrogen_0_5cm+soc_5_15cm+ ECN_5_15cm+pH_0_5cm|year,tz_lsms_panel)



coef(baseline_quad_lmList_inter_soil )
     (Intercept)   N_kgha I(N_kgha^2)    P_kgha  I(P_kgha^2)     ncrops
2008    344.9938 5.590200  0.02299555 26.616936 -0.165073591 -138.68058
2010    690.6962 0.773424  0.07314905 12.300742 -0.021394571 -138.01132
2012    771.6599 8.455786  0.02212014  4.660432 -0.029571214  -82.02707
2014   -516.9310 4.590467  0.10291821  7.152035 -0.005874209 -132.39962
2020    253.4637 4.348198  0.06611582  8.808098 -0.014822935 -109.77742
         hhmem    femhead headeduc   arain_tot population_density
2008 -2.315560  -67.76180 82.97465 -0.05433200        0.025867721
2010  3.571691  -36.73104 40.04794 -0.09400389       -0.008371859
2012 -6.084241  -15.27969 48.52885  0.04399272        0.029982376
2014  5.462428  -97.61363 91.78161 -0.02309245        0.100187964
2020 -2.442170 -110.14636 67.24755 -0.10335165        0.034362749
     wc2.1_30s_elev sand_0_5cm nitrogen_0_5cm soc_5_15cm ECN_5_15cm  pH_0_5cm
2008     0.35170114  -2.292053       3.606550  -3.024491   6.584207  73.13013
2010     0.20446201  -6.385131       3.567519   1.787091   7.816707  81.13167
2012     0.18420098  -3.362229    -168.001221  15.149161   1.371189  10.22203
2014     0.07857355  -1.536386    -108.699731  18.295217  17.398346 238.60303
2020     0.33614694  -3.185741      13.873565  -4.340038  18.168124 122.47137
Code
(ci <- confint(baseline_quad_lmList_inter_soil))
An object of class "lmList4.confint"
, , (Intercept)

          2.5 %    97.5 %
2008  -518.5343 1208.5220
2010  -203.2316 1584.6240
2012    57.7870 1485.5328
2014 -1473.5096  439.6476
2020  -652.3841 1159.3116

, , N_kgha

         2.5 %    97.5 %
2008  0.264545 10.915856
2010 -4.873741  6.420588
2012  3.522538 13.389034
2014 -2.097005 11.277939
2020 -1.913694 10.610090

, , I(N_kgha^2)

            2.5 %     97.5 %
2008 -0.035128912 0.08112001
2010  0.003948300 0.14234979
2012 -0.038266633 0.08250691
2014  0.029074135 0.17676228
2020 -0.005021225 0.13725287

, , P_kgha

          2.5 %   97.5 %
2008 16.9474209 36.28645
2010  5.1808885 19.42060
2012 -0.1263167  9.44718
2014 -0.9473725 15.25144
2020  1.1320830 16.48411

, , I(P_kgha^2)

           2.5 %       97.5 %
2008 -0.26422751 -0.065919670
2010 -0.06586397  0.023074822
2012 -0.05555886 -0.003583565
2014 -0.07571676  0.063968346
2020 -0.09409896  0.064453091

, , ncrops

         2.5 %    97.5 %
2008 -178.1318 -99.22932
2010 -180.3439 -95.67873
2012 -115.2940 -48.76016
2014 -171.2909 -93.50831
2020 -152.8151 -66.73979

, , hhmem

          2.5 %    97.5 %
2008 -15.709708 11.078588
2010  -8.355385 15.498767
2012 -15.220813  3.052332
2014  -7.077640 18.002496
2020 -13.420815  8.536475

, , femhead

          2.5 %     97.5 %
2008 -154.98745  19.463844
2010 -122.51883  49.056749
2012  -87.89271  57.333342
2014 -187.92309  -7.304167
2020 -195.97019 -24.322536

, , headeduc

           2.5 %    97.5 %
2008  13.8068958 152.14241
2010 -25.8846660 105.98055
2012   0.8649579  96.19274
2014  32.7557275 150.80750
2020  11.9745654 122.52054

, , arain_tot

           2.5 %      97.5 %
2008 -0.19393481  0.08527082
2010 -0.22862578  0.04061800
2012 -0.04928536  0.13727081
2014 -0.12368931  0.07750440
2020 -0.18233717 -0.02436613

, , population_density

            2.5 %     97.5 %
2008 -0.038236033 0.08997147
2010 -0.047043873 0.03030015
2012 -0.008017947 0.06798270
2014  0.015844581 0.18453135
2020 -0.005110674 0.07383617

, , wc2.1_30s_elev

           2.5 %    97.5 %
2008  0.26584501 0.4375573
2010  0.12250200 0.2864220
2012  0.11670493 0.2516970
2014 -0.02390128 0.1810484
2020  0.23191527 0.4403786

, , sand_0_5cm

          2.5 %     97.5 %
2008  -6.246601  1.6624942
2010 -10.236959 -2.5333023
2012  -6.518363 -0.2060943
2014  -6.313210  3.2404376
2020  -7.622382  1.2509000

, , nitrogen_0_5cm

         2.5 %    97.5 %
2008 -128.8486 136.06170
2010 -120.9152 128.05021
2012 -271.6635 -64.33891
2014 -242.3619  24.96248
2020 -132.4154 160.16257

, , soc_5_15cm

          2.5 %    97.5 %
2008 -16.893939 10.844956
2010 -11.541484 15.115667
2012   4.443506 25.854817
2014   5.892234 30.698200
2020 -17.533768  8.853693

, , ECN_5_15cm

          2.5 %   97.5 %
2008  -5.960653 19.12907
2010  -5.737403 21.37082
2012 -10.299643 13.04202
2014  -1.980124 36.77682
2020  -2.203245 38.53949

, , pH_0_5cm

         2.5 %    97.5 %
2008 -31.02051 177.28078
2010 -28.05138 190.31471
2012 -75.92811  96.37217
2014 123.93310 353.27296
2020  14.81227 230.13047
Code
plot(ci,cex=2, cex.lab=3, xlab="Maize yield (kg/ha) response", ylab="Year")

Code
# Quadratic plateau or linear plateau
library(easynls)
tz_lsms_panel.temp <- tz_lsms_panel[, c("yld_","N_kgha")]
nls.QP <- nlsfit(tz_lsms_panel.temp, model = 4)
nls.QP$Model
[1] "y ~ (a + b * x + c * I(x^2)) * (x <= -0.5 * b/c) + (a + I(-b^2/(4 * c))) * (x > -0.5 * b/c)"
Code
mn=nls.QP$Parameters
mn
                                      N_kgha
coefficient a                  -3.909774e-01
coefficient b                   1.052253e-02
coefficient c                  -1.090000e-06
p-value t.test for a            2.841850e-01
p-value t.test for b            0.000000e+00
p-value t.test for c            0.000000e+00
r-squared                       8.650000e-02
adjusted r-squared              8.640000e-02
AIC                             8.165610e+04
BIC                             8.168471e+04
maximum or minimum value for y  2.499715e+01
critical point in x             4.825482e+03
Code
summary(mn)
     N_kgha        
 Min.   :   -0.39  
 1st Qu.:    0.00  
 Median :    0.09  
 Mean   :14015.95  
 3rd Qu.: 1225.12  
 Max.   :81684.71  

5.1.1 Site-year specific Quadratic Only response functions

The site-year specific Quadratic response function can be modeled as At level 1, we have \[ Y_{i} = a_{i} + b_{i}*N + c_{i}*N^2 + \varepsilon_{i} \] At level 2, we have \[ a_{i} \sim N(a_0, \sigma_{a}^2) \\ b_{i} \sim N(b_0, \sigma_{b}^2) \\ c_{i} \sim N(c_0, \sigma_{c}^2) \\ \] This model can be estimated with the linear mixed model function lmer in R package lme4

Code
tz_lsms_panel.temp <- tz_lsms_panel[, .(yld_, N_kgha, N2 = N_kgha^2, year)]
lmer.Q <- lmer(yld_ ~ 1 + N_kgha + N2  + (1 | year) + (0 + N_kgha | year) + (0 + N2 | year), data = tz_lsms_panel.temp)
lmer.Q
Linear mixed model fit by REML ['lmerMod']
Formula: yld_ ~ 1 + N_kgha + N2 + (1 | year) + (0 + N_kgha | year) + (0 +  
    N2 | year)
   Data: tz_lsms_panel.temp
REML criterion at convergence: 152692.7
Random effects:
 Groups   Name        Std.Dev. 
 year     (Intercept) 8.001e+01
 year.1   N_kgha      2.748e+00
 year.2   N2          6.472e-03
 Residual             7.960e+02
Number of obs: 9426, groups:  year, 5
Fixed Effects:
(Intercept)       N_kgha           N2  
  748.29010     11.24225      0.01232  
fit warnings:
Some predictor variables are on very different scales: consider rescaling
optimizer (nloptwrap) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings 
Code
summary(lmer.Q)
Linear mixed model fit by REML ['lmerMod']
Formula: yld_ ~ 1 + N_kgha + N2 + (1 | year) + (0 + N_kgha | year) + (0 +  
    N2 | year)
   Data: tz_lsms_panel.temp

REML criterion at convergence: 152692.7

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.3333 -0.6213 -0.3108  0.2772  6.6397 

Random effects:
 Groups   Name        Variance  Std.Dev. 
 year     (Intercept) 6.402e+03 8.001e+01
 year.1   N_kgha      7.554e+00 2.748e+00
 year.2   N2          4.188e-05 6.472e-03
 Residual             6.337e+05 7.960e+02
Number of obs: 9426, groups:  year, 5

Fixed effects:
             Estimate Std. Error t value
(Intercept) 748.29010   36.88270  20.288
N_kgha       11.24225    1.75175   6.418
N2            0.01232    0.01525   0.808

Correlation of Fixed Effects:
       (Intr) N_kgha
N_kgha -0.051       
N2      0.046 -0.657
fit warnings:
Some predictor variables are on very different scales: consider rescaling
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')

Or, althernatively, can be estimated with the non-linear mixed model function nlme

Code
library(nlme)
nlme.Q <- nlme(yld_ ~ (a + b * N_kgha + c * (N_kgha^2)),
                    data = tz_lsms_panel,
                    fixed = a + b + c ~ 1,
                    random = a + b + c ~ 1,
                    groups = ~ year,
                    start = c(800, 10, -0.001))

nlme.Q
Nonlinear mixed-effects model fit by maximum likelihood
  Model: yld_ ~ (a + b * N_kgha + c * (N_kgha^2)) 
  Data: tz_lsms_panel 
  Log-likelihood: -76345.33
  Fixed: a + b + c ~ 1 
           a            b            c 
747.82829275  11.16211183   0.01323599 

Random effects:
 Formula: list(a ~ 1, b ~ 1, c ~ 1)
 Level: year
 Structure: General positive-definite, Log-Cholesky parametrization
         StdDev       Corr         
a         57.45976820 a      b     
b          1.64304646 -0.022       
c          0.02343941  0.946 -0.341
Residual 795.98608057              

Number of Observations: 9426
Number of Groups: 5 

5.1.2 Site-year specific Quadratic-plus-plateau response functions

The site-year specific response function can be modeled using a hierarchical quadratic-plus-plateau model. At level 1, we have \[ Y_{i} = \min(a_{i} + b_{i}*N + c_{i}*N^2, Y_{max}) + \varepsilon_{i} \] At level 2, we have \[ a_{i} \sim N(a_0, \sigma_{a}^2) \\ b_{i} \sim N(b_0, \sigma_{b}^2) \\ c_{i} \sim N(c_0, \sigma_{c}^2) \\ Y_{max} = a_{i} - b_{i}^2/(4*c_i) \]

It seems the R function nlme would work to estimate this model

Code
# Define quadratic-plus-plateau function
mq <- lm(yld_ ~ N_kgha + I(N_kgha^2), data=tz_lsms_panel)
a0 <- coef(mq)[[1]]
b0 <- coef(mq)[[2]]
c0 <- coef(mq)[[3]]
clx0 <- -0.5*b0/c0

# Test nls

# fx.QP <- function(N, a, b, c) {
#   y <- (a + b * N + c * I(N^2)) * (N <= -0.5 * b/c) + (a + I(-b^2/(4 * c))) * (N > -0.5 * b/c)
#   return(y)
# }
# 
# nls.QP <- nls(Y ~ fx.QP(N, a, b, c),
#             start = list(a = a0, b = b0, c = c0), data = dat.Puntel.CC.mean,
#             control = nls.control(maxiter = 6000))


# quadplat <- function(x, a, b, clx) {
#   ifelse(x  < clx, a + b * x   + (-0.5*b/clx) * x   * x, 
#                             a + b * clx + (-0.5*b/clx) * clx * clx)
# }
# 
# nls.QP <- nls(Y ~ quadplat(N, a, b, clx), 
#             start = list(a = a0, b = b0, clx = clx0), data = dat.Puntel.CC.mean, 
#             control = nls.control(maxiter = 6000))

# nlme.QP <- nlme(Y ~ fx.QP(N, a, b, c),
#                     data = dat.Puntel.CC.mean,
#                     fixed = a + b + c ~ 1,
#                     random = a + b + c ~ 1,
#                     groups = ~ year,
#                     start = c(a0, b0, c0))
# 
# nlme.QP

# tz_lsms_panel.nlme <- nlme(yld_ ~ (a + b * N_kgha  + c * I(N_kgha  ^2)) * (N_kgha  <= (-0.5 * b/c)) + (a- I(b^2/(4 * c))) * (N_kgha  > (-0.5 * b/c)),
#                     data = tz_lsms_panel,
#                     fixed = a + b + c ~ 1,
#                     random = a + b + c ~ 1,
#                     groups = ~ year,
#                     start = c(a0, b0, c0))
# tz_lsms_panel.nlme

5.1.3 Bayesian analysis

Code
# library(brms)
# 
# tz_lsms_panel$Nsq_kgha=tz_lsms_panel$N_kgha^2
# tz_lsms_panel$Y=tz_lsms_panel$yld_
# 
# f1 <- Y ~ (a+ b*N_kgha+c*(Nsq_kgha))*(N_kgha<=(-0.5*b/c))+(a-(b*b/(4*c)))*(N_kgha>(-0.5*b/c))
# 
# prior_1=c(set_prior("normal(5,1)", nlpar="a"),
# set_prior("normal(0,1)", nlpar="b"),
# set_prior("normal(0,1)", nlpar="c"))
# 
# form=bf(f1,nl=TRUE)+list(a~1|year,b~1|year,c~1|year)
# 
# bayesQP=brm(form,prior=prior_1,data=tz_lsms_panel)
# 
# summary(bayesQP)            

5.2 Nonlinear parameter models: geoadditive model

Code
set.seed(321)

library(bamlss)

# Linear
f1 <- yld_~N_kgha + plotha + P_kgha + ncrops + hhmem + femhead + headeduc + arain_tot +population_density+wc2.1_30s_elev+sand_0_5cm+nitrogen_0_5cm+soc_5_15cm+ ECN_5_15cm+pH_0_5cm+ region + year

b1 <- bamlss(f1, data = tz_lsms_panel, family = "gaussian", n.iter = 12000, burnin = 2000, thin = 10)
AICc 147908.5 logPost -74304.0 logLik -73902.9 edf 51.000 eps 0.2856 iteration   1
AICc 147906.0 logPost -74302.7 logLik -73901.7 edf 51.000 eps 0.0008 iteration   2
AICc 147906.0 logPost -74302.7 logLik -73901.7 edf 51.000 eps 0.0000 iteration   3
AICc 147906.0 logPost -74302.7 logLik -73901.7 edf 51.000 eps 0.0000 iteration   3
elapsed time:  0.50sec
Starting the sampler...

|                    |   0% 11.19min
|*                   |   5%  9.84min 31.06sec
|**                  |  10%  9.64min  1.07min
|***                 |  15%  8.59min  1.52min
|****                |  20%  8.15min  2.04min
|*****               |  25%  7.60min  2.53min
|******              |  30%  6.96min  2.98min
|*******             |  35%  6.37min  3.43min
|********            |  40%  5.80min  3.87min
|*********           |  45%  5.36min  4.39min
|**********          |  50%  6.16min  6.16min
|***********         |  55%  6.91min  8.44min
|************        |  60%  7.05min 10.58min
|*************       |  65%  6.06min 11.26min
|**************      |  70%  5.02min 11.72min
|***************     |  75%  4.06min 12.18min
|****************    |  80%  3.17min 12.68min
|*****************   |  85%  2.34min 13.28min
|******************  |  90%  1.53min 13.80min
|******************* |  95% 45.58sec 14.43min
|********************| 100%  0.00sec 15.15min
Code
summary(b1)

Call:
bamlss(formula = f1, family = "gaussian", data = tz_lsms_panel, 
    n.iter = 12000, burnin = 2000, thin = 10)
---
Family: gaussian 
Link function: mu = identity, sigma = log
*---
Formula mu:
---
yld_ ~ N_kgha + plotha + P_kgha + ncrops + hhmem + femhead + 
    headeduc + arain_tot + population_density + wc2.1_30s_elev + 
    sand_0_5cm + nitrogen_0_5cm + soc_5_15cm + ECN_5_15cm + pH_0_5cm + 
    region + year
-
Parametric coefficients:
                         Mean       2.5%        50%      97.5% parameters
(Intercept)         6.836e+02  2.572e+02  6.835e+02  1.167e+03    718.979
N_kgha              8.540e+00  7.643e+00  8.548e+00  9.497e+00      8.524
plotha             -1.355e+02 -1.488e+02 -1.354e+02 -1.220e+02   -135.626
P_kgha              5.123e+00  3.680e+00  5.128e+00  6.571e+00      5.111
ncrops             -5.897e+01 -7.773e+01 -5.897e+01 -4.172e+01    -59.157
hhmem               1.506e+01  1.004e+01  1.510e+01  2.005e+01     15.087
femhead            -8.342e+01 -1.212e+02 -8.321e+01 -4.781e+01    -83.891
headeduc            6.202e+01  3.804e+01  6.185e+01  8.733e+01     62.899
arain_tot          -1.518e-02 -6.238e-02 -1.561e-02  3.131e-02     -0.015
population_density  1.745e-02 -4.755e-03  1.767e-02  3.920e-02      0.017
wc2.1_30s_elev      2.102e-01  1.450e-01  2.119e-01  2.727e-01      0.212
sand_0_5cm         -4.607e+00 -6.584e+00 -4.619e+00 -2.640e+00     -4.718
nitrogen_0_5cm     -1.157e+02 -1.837e+02 -1.152e+02 -4.325e+01   -115.231
soc_5_15cm          5.693e-01 -5.749e+00  5.297e-01  6.781e+00      0.142
ECN_5_15cm         -3.531e-01 -7.610e+00 -3.118e-01  6.414e+00     -0.342
pH_0_5cm            3.682e+01 -2.261e+01  3.767e+01  9.200e+01     31.929
region2             2.288e+02  1.033e+02  2.301e+02  3.544e+02    238.404
region3             2.103e+02  9.507e+01  2.091e+02  3.412e+02    217.007
region4             4.040e+02  2.743e+02  4.022e+02  5.334e+02    411.067
region5             1.953e+02  8.349e+01  1.961e+02  3.086e+02    204.028
region6            -2.812e+01 -1.971e+02 -2.962e+01  1.413e+02    -21.629
region7             1.866e+02 -9.262e+01  1.872e+02  4.527e+02    197.038
region8             1.165e+02 -1.069e+01  1.148e+02  2.483e+02    122.315
region9             5.199e+01 -5.264e+01  5.154e+01  1.698e+02     58.037
region10            1.395e+02  3.408e+01  1.374e+02  2.509e+02    144.948
region11            1.306e+02  3.142e+01  1.323e+02  2.256e+02    135.092
region12            2.817e+02  1.859e+02  2.817e+02  3.750e+02    285.789
region13            7.941e+01 -4.642e+01  8.067e+01  2.035e+02     84.140
region14            1.115e+01 -8.079e+01  9.429e+00  1.056e+02     14.846
region15            4.619e+02  3.492e+02  4.606e+02  5.844e+02    468.989
region16           -6.294e+01 -1.786e+02 -6.270e+01  5.299e+01    -60.143
region17           -4.502e+01 -1.419e+02 -4.541e+01  5.608e+01    -42.919
region18           -1.188e+02 -2.415e+02 -1.181e+02  1.065e+01   -116.123
region19           -1.015e+02 -2.136e+02 -1.018e+02  1.421e+00    -94.799
region20            7.549e+00 -1.264e+02  7.136e+00  1.521e+02     11.918
region21            3.597e+02  2.386e+02  3.617e+02  4.916e+02    366.453
region22           -5.738e+01 -2.400e+02 -6.043e+01  1.210e+02    -53.623
region23            5.402e+02  3.681e+02  5.401e+02  7.164e+02    551.701
region24            3.749e+01 -9.324e+01  3.704e+01  1.663e+02     47.377
region25            1.743e+02 -1.426e+00  1.747e+02  3.541e+02    184.003
region26            2.649e+02 -5.512e+01  2.636e+02  5.864e+02    290.278
region51            3.174e+02 -1.600e+02  3.258e+02  7.533e+02    346.615
region52            1.123e+02 -4.717e+02  1.057e+02  6.942e+02    128.517
region53            1.020e+03  9.074e+01  1.017e+03  1.932e+03   1300.925
region54           -4.123e+01 -1.177e+03 -4.140e+01  1.102e+03    -34.228
region55           -2.190e+02 -7.323e+02 -2.257e+02  3.133e+02   -230.656
year2010            4.898e+01 -3.498e+00  4.879e+01  1.002e+02     48.151
year2012            1.602e+01 -3.383e+01  1.628e+01  6.654e+01     15.964
year2014            2.087e+02  1.542e+02  2.087e+02  2.612e+02    207.388
year2020            1.731e+02  1.152e+02  1.732e+02  2.287e+02    170.268
-
Acceptance probability:
         Mean    2.5%     50% 97.5%
alpha 0.62258 0.06191 0.62615     1
---
Formula sigma:
---
sigma ~ 1
-
Parametric coefficients:
             Mean  2.5%   50% 97.5% parameters
(Intercept) 6.610 6.595 6.610 6.624      6.607
-
Acceptance probability:
        Mean   2.5%    50% 97.5%
alpha 0.9959 0.9613 1.0000     1
---
Sampler summary:
-
DIC = 147903.9 logLik = -73926.97 pd = 49.9763
runtime = 909.5
---
Optimizer summary:
-
AICc = 147906 edf = 51 logLik = -73901.71
logPost = -74302.79 nobs = 9208 runtime = 0.5
Code
# Nonlinear
# f2=yld_~s(N_kgha)+s(plotha)+s(P_kgha)+s(ncrops)+s(hhmem)+femhead+headeduc+s(arain_tot)+ti(region)+ti(year)
#
# b2=bamlss(f2,data=tz_lsms_panel,family="gaussian", n.iter=12000, burnin=2000, thin=10)
#
#
# summary(b2)
# plot(b2, ask=FALSE)
# #

f3 <- list(
    yld_ ~ s(N_kgha, by = year) + plotha + P_kgha + ncrops + hhmem + femhead + headeduc + arain_tot +population_density+wc2.1_30s_elev+sand_0_5cm+nitrogen_0_5cm+soc_5_15cm+ ECN_5_15cm+pH_0_5cm,

    sigma ~ s(N_kgha, by = year) + plotha + P_kgha + ncrops + hhmem + femhead + headeduc + arain_tot+population_density+wc2.1_30s_elev+sand_0_5cm+nitrogen_0_5cm+soc_5_15cm+ ECN_5_15cm+pH_0_5cm
)

b3 <- bamlss(f3, data = tz_lsms_panel)
AICc 147510.5 logPost -74164.3 logLik -73700.7 edf 54.181 eps 0.3607 iteration   1
AICc 147076.9 logPost -73984.7 logLik -73480.2 edf 57.867 eps 0.0593 iteration   2
AICc 147025.5 logPost -73984.8 logLik -73450.9 edf 61.365 eps 0.0201 iteration   3
AICc 147005.0 logPost -73995.9 logLik -73438.3 edf 63.759 eps 0.0067 iteration   4
AICc 146988.8 logPost -74009.3 logLik -73426.1 edf 67.782 eps 0.0084 iteration   5
AICc 146981.5 logPost -74022.0 logLik -73419.7 edf 70.491 eps 0.0034 iteration   6
AICc 146980.6 logPost -74025.5 logLik -73418.4 edf 71.329 eps 0.0014 iteration   7
AICc 146980.5 logPost -74025.6 logLik -73418.3 edf 71.385 eps 0.0005 iteration   8
AICc 146980.4 logPost -74025.7 logLik -73418.2 edf 71.392 eps 0.0003 iteration   9
AICc 146980.4 logPost -74025.6 logLik -73418.2 edf 71.392 eps 0.0001 iteration  10
AICc 146980.4 logPost -74025.6 logLik -73418.2 edf 71.392 eps 0.0000 iteration  11
AICc 146980.4 logPost -74025.6 logLik -73418.2 edf 71.392 eps 0.0000 iteration  11
elapsed time: 14.60sec
Starting the sampler...

|                    |   0%  2.34min
|*                   |   5%  1.81min  5.72sec
|**                  |  10%  1.59min 10.58sec
|***                 |  15%  1.44min 15.24sec
|****                |  20%  2.46min 36.93sec
|*****               |  25%  3.08min  1.03min
|******              |  30%  3.33min  1.43min
|*******             |  35%  3.41min  1.84min
|********            |  40%  3.37min  2.24min
|*********           |  45%  3.25min  2.66min
|**********          |  50%  3.10min  3.10min
|***********         |  55%  2.91min  3.56min
|************        |  60%  2.66min  3.99min
|*************       |  65%  2.37min  4.40min
|**************      |  70%  2.06min  4.80min
|***************     |  75%  1.74min  5.21min
|****************    |  80%  1.40min  5.61min
|*****************   |  85%  1.07min  6.04min
|******************  |  90% 42.96sec  6.44min
|******************* |  95% 21.34sec  6.76min
|********************| 100%  0.00sec  6.84min
Code
summary(b3)

Call:
bamlss(formula = f3, data = tz_lsms_panel)
---
Family: gaussian 
Link function: mu = identity, sigma = log
*---
Formula mu:
---
yld_ ~ s(N_kgha, by = year) + plotha + P_kgha + ncrops + hhmem + 
    femhead + headeduc + arain_tot + population_density + wc2.1_30s_elev + 
    sand_0_5cm + nitrogen_0_5cm + soc_5_15cm + ECN_5_15cm + pH_0_5cm
-
Parametric coefficients:
                         Mean       2.5%        50%      97.5% parameters
(Intercept)         1.306e+02 -2.216e+02  1.314e+02  4.708e+02    123.318
plotha             -6.811e+01 -7.656e+01 -6.797e+01 -6.020e+01    -68.100
P_kgha              7.646e+00  5.150e+00  7.668e+00  1.005e+01      7.362
ncrops             -5.075e+01 -6.237e+01 -5.095e+01 -3.848e+01    -50.101
hhmem               6.663e+00  2.481e+00  6.703e+00  1.104e+01      6.827
femhead            -6.655e+01 -9.583e+01 -6.626e+01 -3.417e+01    -65.173
headeduc            6.431e+01  3.902e+01  6.456e+01  8.957e+01     66.178
arain_tot           2.440e-02 -1.025e-02  2.372e-02  6.130e-02      0.026
population_density  3.129e-02  3.099e-03  2.967e-02  6.872e-02      0.027
wc2.1_30s_elev      2.087e-01  1.753e-01  2.079e-01  2.416e-01      0.209
sand_0_5cm         -2.011e+00 -3.488e+00 -2.014e+00 -3.951e-01     -1.980
nitrogen_0_5cm     -2.361e+01 -7.042e+01 -2.411e+01  2.053e+01    -22.706
soc_5_15cm          3.764e+00 -9.591e-01  3.796e+00  8.454e+00      3.751
ECN_5_15cm          1.093e+01  3.716e+00  1.082e+01  1.869e+01     11.088
pH_0_5cm            9.797e+01  5.453e+01  9.717e+01  1.417e+02     97.893
-
Acceptance probability:
        Mean   2.5%    50% 97.5%
alpha 0.9860 0.9079 0.9994     1
-
Smooth terms:
                                  Mean      2.5%       50%     97.5% parameters
s(N_kgha,by=year):2008.tau21 4.154e+05 2.586e-01 1.437e+02 1.305e+06  1.545e+00
s(N_kgha,by=year):2008.alpha 1.000e+00 1.000e+00 1.000e+00 1.000e+00         NA
s(N_kgha,by=year):2008.edf   1.276e+00 9.788e-01 1.003e+00 3.212e+00  9.970e-01
s(N_kgha,by=year):2010.tau21 2.172e+06 7.961e+04 1.078e+06 1.023e+07  1.070e+06
s(N_kgha,by=year):2010.alpha 1.000e+00 1.000e+00 1.000e+00 1.000e+00         NA
s(N_kgha,by=year):2010.edf   3.774e+00 2.026e+00 3.676e+00 5.895e+00  3.722e+00
s(N_kgha,by=year):2012.tau21 1.675e+05 2.208e+01 1.164e+04 1.090e+06  2.920e-01
s(N_kgha,by=year):2012.alpha 1.000e+00 1.000e+00 1.000e+00 1.000e+00         NA
s(N_kgha,by=year):2012.edf   1.493e+00 1.000e+00 1.217e+00 3.476e+00  9.860e-01
s(N_kgha,by=year):2014.tau21 5.093e+06 3.200e+05 3.048e+06 2.105e+07  4.657e+06
s(N_kgha,by=year):2014.alpha 1.000e+00 1.000e+00 1.000e+00 1.000e+00         NA
s(N_kgha,by=year):2014.edf   4.188e+00 2.472e+00 4.147e+00 6.114e+00  4.599e+00
s(N_kgha,by=year):2020.tau21 2.628e+06 3.348e-01 9.525e+05 1.745e+07  2.488e+06
s(N_kgha,by=year):2020.alpha 1.000e+00 1.000e+00 1.000e+00 1.000e+00         NA
s(N_kgha,by=year):2020.edf   3.025e+00 9.842e-01 3.195e+00 5.860e+00  3.982e+00
---
Formula sigma:
---
sigma ~ s(N_kgha, by = year) + plotha + P_kgha + ncrops + hhmem + 
    femhead + headeduc + arain_tot + population_density + wc2.1_30s_elev + 
    sand_0_5cm + nitrogen_0_5cm + soc_5_15cm + ECN_5_15cm + pH_0_5cm
-
Parametric coefficients:
                         Mean       2.5%        50%      97.5% parameters
(Intercept)         5.717e+00  5.372e+00  5.720e+00  6.080e+00      5.723
plotha             -1.196e-01 -1.295e-01 -1.200e-01 -1.085e-01     -0.120
P_kgha              3.320e-03  1.577e-03  3.306e-03  5.029e-03      0.003
ncrops             -7.837e-02 -9.451e-02 -7.802e-02 -6.342e-02     -0.079
hhmem               7.382e-03  2.670e-03  7.470e-03  1.245e-02      0.007
femhead            -8.498e-02 -1.228e-01 -8.486e-02 -4.878e-02     -0.087
headeduc            1.187e-01  9.719e-02  1.186e-01  1.402e-01      0.120
arain_tot          -7.409e-06 -4.941e-05 -8.141e-06  3.337e-05      0.000
population_density  4.376e-05  1.671e-05  4.306e-05  7.286e-05      0.000
wc2.1_30s_elev      2.208e-04  1.870e-04  2.211e-04  2.574e-04      0.000
sand_0_5cm         -1.598e-03 -3.244e-03 -1.605e-03  1.458e-04     -0.002
nitrogen_0_5cm      6.620e-02  1.655e-02  6.616e-02  1.172e-01      0.068
soc_5_15cm         -4.202e-03 -9.697e-03 -3.902e-03  7.907e-04     -0.004
ECN_5_15cm          1.373e-02  6.786e-03  1.379e-02  2.149e-02      0.014
pH_0_5cm            1.371e-01  9.640e-02  1.380e-01  1.789e-01      0.137
-
Acceptance probability:
           Mean      2.5%       50% 97.5%
alpha 0.5250304 0.0003718 0.5129290     1
-
Smooth terms:
                                  Mean      2.5%       50%     97.5% parameters
s(N_kgha,by=year):2008.tau21 1.430e+01 3.628e-01 7.719e+00 7.216e+01     35.875
s(N_kgha,by=year):2008.alpha 7.969e-01 2.273e-01 9.106e-01 1.000e+00         NA
s(N_kgha,by=year):2008.edf   4.930e+00 2.675e+00 5.006e+00 7.216e+00      6.540
s(N_kgha,by=year):2010.tau21 1.512e+01 2.636e+00 1.133e+01 5.149e+01     12.185
s(N_kgha,by=year):2010.alpha 7.954e-01 1.618e-01 9.062e-01 1.000e+00         NA
s(N_kgha,by=year):2010.edf   5.950e+00 4.412e+00 5.936e+00 7.532e+00      6.019
s(N_kgha,by=year):2012.tau21 4.357e-01 9.376e-05 3.787e-02 3.966e+00      0.913
s(N_kgha,by=year):2012.alpha 9.205e-01 4.846e-01 9.908e-01 1.000e+00         NA
s(N_kgha,by=year):2012.edf   2.034e+00 1.003e+00 1.697e+00 4.857e+00      3.533
s(N_kgha,by=year):2014.tau21 4.887e+00 4.419e-01 2.869e+00 2.319e+01      4.782
s(N_kgha,by=year):2014.alpha 7.284e-01 5.818e-02 8.178e-01 1.000e+00         NA
s(N_kgha,by=year):2014.edf   4.330e+00 2.796e+00 4.219e+00 6.418e+00      4.710
s(N_kgha,by=year):2020.tau21 7.864e+00 5.636e-01 4.674e+00 3.574e+01     15.593
s(N_kgha,by=year):2020.alpha 7.815e-01 1.840e-01 8.803e-01 1.000e+00         NA
s(N_kgha,by=year):2020.edf   5.028e+00 3.118e+00 4.987e+00 7.193e+00      6.305
---
Sampler summary:
-
DIC = 146996.9 logLik = -73462.92 pd = 71.0341
runtime = 411.95
---
Optimizer summary:
-
AICc = 146980.5 edf = 71.3927 logLik = -73418.29
logPost = -74025.7 nobs = 9208 runtime = 14.6
Code
# plot(b3,ask=FALSE)

plot(b3, cex = 2.5, model = "mu", term = "s(N_kgha,by=year)")

Code
plot(b3, cex = 2.5, model = "sigma", term = "s(N_kgha,by=year")

6 Causal RF approach

6.1 Binary treatment

Code
library(grf)
library(policytree)

tz_lsms_panel_estim_fert <- subset(tz_lsms_panel, select = c("fert1_bin", "yld_", "N_kgha", "plotha", "P_kgha", "ncrops", "hhmem", "femhead", "headeduc", "arain_tot", "population_density", "wc2.1_30s_elev", "sand_0_5cm", "nitrogen_0_5cm", "soc_5_15cm", "ECN_5_15cm", "pH_0_5cm", "region", "year", "lat_modified", "lon_modified"))

tz_lsms_panel_estim_fert$region <- as.numeric(tz_lsms_panel_estim_fert$region)
tz_lsms_panel_estim_fert$year <- as.numeric(tz_lsms_panel_estim_fert$year)

library(tidyr)
tz_lsms_panel_estim_fert <- tz_lsms_panel_estim_fert %>% drop_na()


Y_cf_fert <- as.vector(tz_lsms_panel_estim_fert$yld_)
## Causal random forest -----------------

X_cf_fert <- subset(tz_lsms_panel_estim_fert, select = c("plotha", "ncrops", "hhmem", "femhead", "headeduc", "arain_tot", "population_density", "wc2.1_30s_elev", "sand_0_5cm", "nitrogen_0_5cm", "soc_5_15cm", "ECN_5_15cm", "pH_0_5cm", "region", "year"))

X_firststage_cf_fert <- subset(tz_lsms_panel_estim_fert, select = c("plotha", "ncrops", "hhmem", "femhead", "headeduc", "arain_tot", "population_density", "wc2.1_30s_elev", "sand_0_5cm", "nitrogen_0_5cm", "soc_5_15cm", "ECN_5_15cm", "pH_0_5cm", "region", "year"))


W_cf_fert_binary <- as.vector(tz_lsms_panel_estim_fert$fert1_bin)

# Probability random forest to create weights
W.multi_fert.forest_binary <- regression_forest(X_cf_fert, W_cf_fert_binary,
    equalize.cluster.weights = FALSE,
    seed = 2
)
W.hat.multi.all_fert_binary <- predict(W.multi_fert.forest_binary, estimate.variance = TRUE)$predictions


# Regression forest to get expected responses
Y.multi_fert.forest_binary <- regression_forest(X_cf_fert, Y_cf_fert,
    equalize.cluster.weights = FALSE,
    seed = 2
)

print(Y.multi_fert.forest_binary)
GRF forest object of type regression_forest 
Number of trees: 2000 
Number of training samples: 9208 
Variable importance: 
    1     2     3     4     5     6     7     8     9    10    11    12    13 
0.706 0.013 0.003 0.001 0.004 0.005 0.005 0.212 0.007 0.006 0.020 0.003 0.007 
   14    15 
0.005 0.003 
Code
varimp.multi_fert_binary <- variable_importance(Y.multi_fert.forest_binary)
Y.hat.multi.all_fert_binary <- predict(Y.multi_fert.forest_binary, estimate.variance = TRUE)$predictions

# Fit binary causal RF model
multi_fert.forest_binary <- causal_forest(X = X_cf_fert, Y = Y_cf_fert, W = W_cf_fert_binary, W.hat = W.hat.multi.all_fert_binary, Y.hat = Y.hat.multi.all_fert_binary, seed = 2)

varimp.multi_fert_cf_binary <- variable_importance(multi_fert.forest_binary)

# Average treatment effects
multi_fert_ate_binary <- average_treatment_effect(multi_fert.forest_binary, target.sample = "overlap")
multi_fert_ate_binary
 estimate   std.err 
296.77835  30.26994 
Code
multi_fert_binary_calibration <- test_calibration(multi_fert.forest_binary)
multi_fert_binary_calibration

Best linear fit using forest predictions (on held-out data)
as well as the mean forest prediction as regressors, along
with one-sided heteroskedasticity-robust (HC3) SEs:

                               Estimate Std. Error t value    Pr(>t)    
mean.forest.prediction          0.99724    0.10227  9.7509 < 2.2e-16 ***
differential.forest.prediction  1.25318    0.26830  4.6708 1.521e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6.2 Continuous treatment

Code
library(grf)
library(policytree)

tz_lsms_panel_estim_fert <- subset(tz_lsms_panel, select = c("fert1_bin", "yld_", "N_kgha", "plotha", "P_kgha", "ncrops", "hhmem", "femhead", "headeduc", "arain_tot", "population_density", "wc2.1_30s_elev", "sand_0_5cm", "nitrogen_0_5cm", "soc_5_15cm", "ECN_5_15cm", "pH_0_5cm", "region", "year", "lat_modified", "lon_modified"))

tz_lsms_panel_estim_fert$region <- as.numeric(tz_lsms_panel_estim_fert$region)
tz_lsms_panel_estim_fert$year <- as.numeric(tz_lsms_panel_estim_fert$year)

library(tidyr)
tz_lsms_panel_estim_fert <- tz_lsms_panel_estim_fert %>% drop_na()


Y_cf_fert <- as.vector(tz_lsms_panel_estim_fert$yld_)
## Causal random forest -----------------

X_cf_fert <- subset(tz_lsms_panel_estim_fert, select = c("plotha", "ncrops", "hhmem", "femhead", "headeduc", "arain_tot", "population_density", "wc2.1_30s_elev", "sand_0_5cm", "nitrogen_0_5cm", "soc_5_15cm", "ECN_5_15cm", "pH_0_5cm", "region", "year"))

X_firststage_cf_fert <- subset(tz_lsms_panel_estim_fert, select = c("plotha", "ncrops", "hhmem", "femhead", "headeduc", "arain_tot", "population_density", "wc2.1_30s_elev", "sand_0_5cm", "nitrogen_0_5cm", "soc_5_15cm", "ECN_5_15cm", "pH_0_5cm", "region", "year"))


W_cf_fert_continuous <- as.vector(tz_lsms_panel_estim_fert$N_kgha)

# Probability random forest to create weights
W.multi_fert.forest_continuous <- regression_forest(X_cf_fert, W_cf_fert_continuous,
    equalize.cluster.weights = FALSE,
    seed = 2
)
W.hat.multi.all_fert_continuous <- predict(W.multi_fert.forest_continuous, estimate.variance = TRUE)$predictions


# Regression forest to get expected responses
Y.multi_fert.forest_continuous <- regression_forest(X_cf_fert, Y_cf_fert,
    equalize.cluster.weights = FALSE,
    seed = 2
)

print(Y.multi_fert.forest_continuous)
GRF forest object of type regression_forest 
Number of trees: 2000 
Number of training samples: 9208 
Variable importance: 
    1     2     3     4     5     6     7     8     9    10    11    12    13 
0.706 0.013 0.003 0.001 0.004 0.005 0.005 0.212 0.007 0.006 0.020 0.003 0.007 
   14    15 
0.005 0.003 
Code
varimp.multi_fert_continuous <- variable_importance(Y.multi_fert.forest_continuous)
Y.hat.multi.all_fert_continuous <- predict(Y.multi_fert.forest_continuous, estimate.variance = TRUE)$predictions

# Fit continuous causal RF model
multi_fert.forest_continuous <- causal_forest(X = X_cf_fert, Y = Y_cf_fert, W = W_cf_fert_continuous, W.hat = W.hat.multi.all_fert_continuous, Y.hat = Y.hat.multi.all_fert_continuous, seed = 2)

varimp.multi_fert_cf_continuous <- variable_importance(multi_fert.forest_continuous)

# Average treatment effects
multi_fert_ate_continuous <- average_treatment_effect(multi_fert.forest_continuous, target.sample = "overlap")
multi_fert_ate_continuous
estimate  std.err 
9.921472 0.711926 
Code
multi_fert_continuous_calibration <- test_calibration(multi_fert.forest_continuous)
multi_fert_continuous_calibration

Best linear fit using forest predictions (on held-out data)
as well as the mean forest prediction as regressors, along
with one-sided heteroskedasticity-robust (HC3) SEs:

                               Estimate Std. Error t value    Pr(>t)    
mean.forest.prediction          0.99353    0.06987 14.2196 < 2.2e-16 ***
differential.forest.prediction  1.23712    0.26364  4.6925 1.369e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
library(ggridges)
library(dplyr)
library(ggplot2)

tau.multi_fert.forest_continuous <- predict(multi_fert.forest_continuous, target.sample = "all", estimate.variance = TRUE)

tau.multi_fert.forest_continuous <- as.data.frame(tau.multi_fert.forest_continuous)

tau.multi_fert.forest_X <- data.frame(tz_lsms_panel_estim_fert, tau.multi_fert.forest_continuous)

ggplot(tau.multi_fert.forest_X, aes(x = predictions, y = "", fill = factor(stat(quantile)))) +
    stat_density_ridges(
        geom = "density_ridges_gradient", calc_ecdf = TRUE,
        quantiles = 4, quantile_lines = TRUE
    ) +
    scale_y_discrete(expand = c(0.01, 0)) +
    scale_fill_viridis_d(name = "Quartiles") +
    expand_limits(y = 1) +
    theme_bw(base_size = 16) +
    labs(x = "N use effect", y = "Density")

7 Exploring heterogeneity in Conditional N Use Efficiencies from CRF

7.1 Causal RF

Code
library(ggplot2)
NperHa_CATE_N_plot <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$N_kgha > 0), aes(N_kgha, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "Applied N per ha", y = "N use efficiency")

previous_theme <- theme_set(theme_bw(base_size = 16))
NperHa_CATE_N_plot

Code
Plotsize_CATE_N_plot <- ggplot(tau.multi_fert.forest_X, aes(plotha, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "Plot size (ha)", y = "N use efficiency")
previous_theme <- theme_set(theme_bw(base_size = 16))
Plotsize_CATE_N_plot

Code
P_CATE_N_plot <- ggplot(tau.multi_fert.forest_X, aes(P_kgha, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "P", y = "N use efficiency")
previous_theme <- theme_set(theme_bw(base_size = 16))
P_CATE_N_plot

Code
Hhmem_CATE_N_plot <- ggplot(tau.multi_fert.forest_X, aes(hhmem, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "HHMEM", y = "N use efficiency")
previous_theme <- theme_set(theme_bw(base_size = 16))
Hhmem_CATE_N_plot

Code
# By year
NperHa_CATE_N_plot_yr <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$N_kgha > 0), aes(N_kgha, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    lims(x = c(0, 100)) +
    labs(x = "Applied N per ha", y = "N use efficiency") +
    theme_bw(base_size = 16) +
    facet_wrap(~year)

NperHa_CATE_N_plot_yr

Code
# By soil organic carbon

library(ggplot2)
soc_CATE_N_plot <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$soc_5_15cm > 0), aes(soc_5_15cm, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "Soil organic carbon (%)", y = "N use efficiency")

previous_theme <- theme_set(theme_bw(base_size = 16))
soc_CATE_N_plot

Code
# By soil sand
library(ggplot2)
sand_CATE_N_plot <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$sand_0_5cm > 0), aes(sand_0_5cm, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "Sand (%)", y = "N use efficiency")

previous_theme <- theme_set(theme_bw(base_size = 16))
sand_CATE_N_plot

Code
# Electrical conductivity
library(ggplot2)
soil_elec_cond_CATE_N_plot <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$ECN_5_15cm > 0), aes(ECN_5_15cm, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "Soil electrical conductivity", y = "N use efficiency")

previous_theme <- theme_set(theme_bw(base_size = 16))
soil_elec_cond_CATE_N_plot

Code
# By density
library(ggplot2)
pop_density_CATE_N_plot <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$population_density > 0), aes(population_density, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "Population density", y = "N use efficiency")

previous_theme <- theme_set(theme_bw(base_size = 16))
pop_density_CATE_N_plot

Code
# By elevation

library(ggplot2)
elev_CATE_N_plot <- ggplot(subset(tau.multi_fert.forest_X, tau.multi_fert.forest_X$wc2.1_30s_elev > 0), aes(wc2.1_30s_elev, predictions)) +
    geom_smooth(method = "loess", formula = y ~ x, col = "darkblue") +
    labs(x = "Elevation (masl)", y = "N use efficiency")

previous_theme <- theme_set(theme_bw(base_size = 16))
elev_CATE_N_plot

8 Are N use efficiencies falling overtime?

8.1 Causal RF

Code
tau.multi_fert.forest_X$year_det[tau.multi_fert.forest_X$year==1]=2008
tau.multi_fert.forest_X$year_det[tau.multi_fert.forest_X$year==2]=2010
tau.multi_fert.forest_X$year_det[tau.multi_fert.forest_X$year==3]=2012
tau.multi_fert.forest_X$year_det[tau.multi_fert.forest_X$year==4]=2014
tau.multi_fert.forest_X$year_det[tau.multi_fert.forest_X$year==5]=2020

Year_CATE_N_plot=ggplot(tau.multi_fert.forest_X,aes(year_det,predictions))+
  geom_smooth(method="loess",formula=y~x,col="darkblue")+
  labs(x="Year",y="N use efficiency")
previous_theme <- theme_set(theme_bw())
Year_CATE_N_plot

9 Double machine learning (DML)

10 Mapping the estimates

Code
library(ggridges)
library(dplyr)
library(ggplot2)

tau.multi_fert.forest_dummy <- predict(multi_fert.forest_binary, target.sample = "all", estimate.variance = TRUE)

tau.multi_fert.forest_dummy <- as.data.frame(tau.multi_fert.forest_dummy)

tau.multi_fert.forest_X_dummy <- data.frame(tz_lsms_panel_estim_fert, tau.multi_fert.forest_dummy)

ggplot(tau.multi_fert.forest_X_dummy, aes(x = predictions, y = "", fill = factor(stat(quantile)))) +
    stat_density_ridges(
        geom = "density_ridges_gradient", calc_ecdf = TRUE,
        quantiles = 4, quantile_lines = TRUE
    ) +
    scale_y_discrete(expand = c(0.01, 0)) +
    scale_fill_viridis_d(name = "Quartiles") +
    expand_limits(y = 1) +
    theme_bw(base_size = 16) +
    labs(x = "Inorgatic fert use effect", y = "Density")

Code
#
library(sp)
library(sf)
tau.multi_fert.forest_X_dummy_sp <- SpatialPointsDataFrame(cbind(tau.multi_fert.forest_X_dummy$lon_modified, tau.multi_fert.forest_X_dummy$lat_modified), data = tau.multi_fert.forest_X_dummy, proj4string = CRS("+proj=longlat +datum=WGS84"))


library(tmap)
tmap_mode("plot")
Nuse_effect_map <- tm_shape(tau.multi_fert.forest_X_dummy_sp) +
    tm_dots(col = "predictions", title = "Effect of N use on yield (kg/ha)", style = "quantile") +
    tm_layout(legend.outside = TRUE)
Nuse_effect_map

Code
tmap_save(Nuse_effect_map, "figures/Nuse_effect_map.png", width = 600, height = 600, asp = 0)


# N Use efficiency map
library(sp)
library(sf)
tau.multi_fert.forest_X_sp <- SpatialPointsDataFrame(cbind(tau.multi_fert.forest_X$lon_modified, tau.multi_fert.forest_X$lat_modified), data = tau.multi_fert.forest_X, proj4string = CRS("+proj=longlat +datum=WGS84"))


library(tmap)
tmap_mode("plot")
Nuse_efficiency_map <- tm_shape(tau.multi_fert.forest_X_sp) +
    tm_dots(col = "predictions", title = "NUE (kg Maize/Kg N)", style = "quantile") +
    tm_layout(legend.outside = TRUE)
Nuse_efficiency_map

Code
tmap_save(Nuse_efficiency_map, "figures/Nuse_efficiency_map.png", width = 600, height = 600, asp = 0)

# N Use  map
library(sp)
library(sf)

tau.multi_fert.forest_X_sp_small <- subset(tau.multi_fert.forest_X_sp, tau.multi_fert.forest_X_sp$N_kgha > 0)

library(tmap)
tmap_mode("plot")
Nuse_map <- tm_shape(tau.multi_fert.forest_X_sp_small) +
    tm_dots(col = "N_kgha", title = "N applied (Kg/ha)", style = "quantile") +
    tm_layout(legend.outside = TRUE)
Nuse_map

Code
tmap_save(Nuse_map, "figures/Nuse_map.png", width = 600, height = 600, asp = 0)

10.1 Map of N Use Efficiencies overtime

Code
library(tmap)
tmap_mode("plot")
Nuse_efficiency_map_yr <- tm_shape(tau.multi_fert.forest_X_sp) +
    tm_dots(col = "predictions", title = "NUE (kg Maize/Kg N)", style = "quantile") +
    tm_layout(legend.outside = TRUE) +
    tm_facets("year")
Nuse_efficiency_map_yr

Code
tmap_save(Nuse_efficiency_map_yr, "figures/Nuse_efficiency_map_yr.png", width = 600, height = 600, asp = 0)

11 Mapping Descriptive Statistics

Code
library(geodata)

TZ <- gadm(country = "TZ", level = 1, path = tempdir())
plot(TZ)

Code
TZ_sf= st_as_sf(TZ)
TZ_sp=as_Spatial(TZ_sf)
tau.multi_fert.forest_X_sp$region_name <- over(tau.multi_fert.forest_X_sp, TZ_sp[,"NAME_1"])


# Summary by region and by year
library(modelsummary)

tau.multi_fert.forest_X_sp_dt <-tau.multi_fert.forest_X_sp@data

tau.multi_fert.forest_X_sp_dt$region_name_final=tau.multi_fert.forest_X_sp_dt$region_name$NAME_1

NUE_cont <- datasummary(Heading("region_name")*region_name_final~ Heading("N_obs") * N + Heading("percent") * Percent()+ predictions*(Mean+SD),data = tau.multi_fert.forest_X_sp_dt, output="data.frame")

NUE_cont$N_obs=as.numeric(NUE_cont$N_obs)
NUE_cont$Mean=as.numeric(NUE_cont$Mean)
NUE_cont$SD=as.numeric(NUE_cont$SD)

library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('NUE_cont', 'NUE_cont.csv')"
        ),
        reactable(
            NUE_cont,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "NUE_cont"
        )
    )
)
Code
# Remove regions with fewer observations than 50
NUE_cont=subset(NUE_cont,NUE_cont$N_obs> 50)

NUE_cont_sf=merge(TZ_sf,NUE_cont, by.x="NAME_1",by.y="region_name")

# Map the 

library(tmap)
tmap_mode("plot")
tm_shape(NUE_cont_sf) +
  tm_polygons(col = "Mean", title = "Average NUE", style = "quantile") +
  tm_layout(legend.outside = TRUE)

12 Mapping results by year and region

Code
mean_na <- function(x) mean(x, na.rm = TRUE)
sd_na <- function(x) SD(x, na.rm = TRUE)
p25_na <- function(x) quantile(x, probs = 0.25, na.rm = TRUE)
median_na <- function(x) median(x, na.rm = TRUE)
p75_na <- function(x) quantile(x, probs = 0.75, na.rm = TRUE)

NUE_cont_region_year <- datasummary(Factor(year)+Factor(region_name_final)~Heading("N obs") * N + Heading("%") * Percent() + (predictions) * (mean_na + sd_na), data = tau.multi_fert.forest_X_sp_dt, output = "data.frame")

library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('NUE_cont_region_year', 'NUE_cont_region_year.csv')"
        ),
        reactable(
            NUE_cont_region_year,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "NUE_cont_region_year"
        )
    )
)
Code
# Mix
library(data.table)
tau.multi_fert.forest_X_sp_dt <- data.table (tau.multi_fert.forest_X_sp_dt)
tau.multi_fert.forest_X_sp_dt$One=1

NUE_cont_region_year_mix <- tau.multi_fert.forest_X_sp_dt[,.(
  Mean=base::mean(predictions, na.rm=TRUE),
  SD=sd(predictions, na.rm = TRUE),
  N_obs=sum(One, na.rm = TRUE)
  ), by =c("year","region_name_final")]

NUE_cont_region_year_mix$year_det[NUE_cont_region_year_mix$year==1]=2008
NUE_cont_region_year_mix$year_det[NUE_cont_region_year_mix$year==2]=2010
NUE_cont_region_year_mix$year_det[NUE_cont_region_year_mix$year==3]=2012
NUE_cont_region_year_mix$year_det[NUE_cont_region_year_mix$year==4]=2014
NUE_cont_region_year_mix$year_det[NUE_cont_region_year_mix$year==5]=2020

library(reactable)
library(htmltools)
library(fontawesome)

htmltools::browsable(
    tagList(
        tags$button(
            tagList(fontawesome::fa("download"), "Download as CSV"),
            onclick = "Reactable.downloadDataCSV('NUE_cont_region_year_mix', 'NUE_cont_region_year_mix.csv')"
        ),
        reactable(
            NUE_cont_region_year_mix,
            searchable = TRUE,
            defaultPageSize = 38,
            elementId = "NUE_cont_region_year_mix"
        )
    )
)
Code
NUE_cont_region_year_mix=subset(NUE_cont_region_year_mix,NUE_cont_region_year_mix$N_obs> 10)

NUE_cont_region_year_mix_sf=merge(TZ_sf,NUE_cont_region_year_mix, by.x="NAME_1",by.y="region_name_final")



library(tmap)
tmap_mode("plot")
NUE_year_region <- tm_shape(NUE_cont_region_year_mix_sf) +
    tm_polygons(col = "Mean", title = "NUE (kg Maize/Kg N)", style = "quantile") +
    tm_layout(legend.outside = TRUE) +
    tm_facets("year_det")
NUE_year_region

Code
tmap_save(NUE_year_region, "figures/NUE_year_region.png", width = 600, height = 600, asp = 0)